# -*- coding: utf-8 -*-
import scrapy,json,time,random
from scrapy.http import Request
from weixin_data.items import WeixinDataItem
from weixin_data.conf import get_cookie,get_conn

class TextspiderSpider(scrapy.Spider):
    name = 'textspider'
    allowed_domains = ['mp.weixin.qq.com']

    def start_requests(self):
        self.conn = get_conn()
        cookies = get_cookie()
        while True:
            data=self.conn.execute("select * from `weixin_data` t where t.`content_text` is null").first()
            if data is None:
                print("处理完成!")
                return
            else:
                sql="update weixin_data set content_text=:content_text where id=:id"
                self.conn.execute(sql,{"content_text":"1","id":data["id"]}) #记录一下
                time.sleep(random.randint(2,6))
                url=data["content_url"]
                yield Request(url,meta={"id":data["id"]}, dont_filter=True,cookies=cookies,callback=self.parse)

    def parse(self, response):
        content='\n'.join(response.css("#js_content *::text").getall())
        pos=response.text.find('var publish_time = "')+20
        timer=response.text[pos:pos+10]
        item=WeixinDataItem()
        item["content_text"]=content.strip()
        item["id"]=response.meta.get("id")
        yield item